from selenium import webdriver
from lxml import etree
import csv
import time
import math
# browserDriver = webdriver.Chrome()
# browserDriver.get('http://www.baidu.com')
# browserDriver.find_element_by_id("kw").send_keys('郑州暴雨')
# browserDriver.find_element_by_id("su").click()

def main():
  get_ranking()

def get_ranking():
  url = 'https://www.shanghairanking.cn/rankings/bcmr/2021/080701'
  get_page_source(url)
 


def get_page_source(url):
  browserDriver = webdriver.Chrome()
  browserDriver.get(url)
  htmlSource =  browserDriver.page_source
  html_data = etree.HTML(htmlSource)
  parse_data(html_data)
  browserDriver.close()


def parse_data(html_data):
  msg_list = []
  rank_list = html_data.xpath('//*[@id="content-box"]/div[2]/div/div[2]/div')

  for school_row in rank_list:
    rank_dict = {}
    rank_dict['school_name'] = school_row.xpath('.//div[1]/div[3]/div[2]/div/div/div/a/text()')[0]
    rank_dict['school_province'] = school_row.xpath('.//div[1]/div[4]/text()')[0]
    rank_dict['school_score'] = school_row.xpath('.//div[1]/div[5]/text()')[0]
    msg_list.append(rank_dict)

  # save_csv(msg_list, '学校排名')
  save_csv(msg_list)
  
''' 
'''
def save_csv(dict, **other):
  if 'fileName' in other:
    file_name = other['fileName'] + '.csv'
  else:
    file_name = str(int(time.time())) + '.csv'
    print('fileName:', file_name)
  header = get_header(dict)
  with open(file_name, 'a', newline='', encoding="utf-8" ) as f:
    writer = csv.DictWriter(f, fieldnames=header)
    writer.writeheader()  # 写入列名
    writer.writerows(dict)  # 写入数据
    print("数据写入完成")
##读取文件
# readDict={}
# with open(fileName,"rb") as csv_file:
#     reader=csv.reader(csv_file)
#     readDict=dict(reader)

''' 
  @input dictionary # 要保存的字典数据
  @return list      # 输出字典数据的key列表，作为csv文件的表头
'''
def get_header(dict):
  headers = []
  first_dict = dict[0]
  for column_name in sorted(first_dict.keys()):
    headers.append(column_name)
  return headers

main()